/*===========================================================================
Project: 	INCREASING THE DEMAND FOR WORKERS WITH A CRIMINAL RECORD
Info: 		Runs additional results.
Program: 	5_additionalresults.do
Created: 	January 4, 2022
Edited:  	July 8, 2022 by KG
Purpose: 	Runs additional results referenced in paper.
Note: 		Set the working directory to the Replication folder that contains this do file.
===========================================================================*/
cap log close
clear all  
version 16, permanently 
discard 

* Install Packages
ssc install gtools, replace
ssc install egenmore, replace
ssc install estout, replace

***** Set paths.
global Main "`c(pwd)'"
cd "$Main"

* Data directory
global Data "$Main/analysis_data"

* Table directory 
global Tables "$Main/output_tables"

* set seed
set seed 89449
set sortseed 12345


/*==================================================================
*** RUN A LOG FILE ***
==================================================================*/
global date = "$S_DATE"
global LogPath = "$Main/log"
capture log close
log using "$LogPath/WCC_$date.log", append

/*==================================================================
Linear Extrapolation of Effective Wage on Hiring of WCs 
==================================================================*/
use "$Data/main_survey_wide.dta", clear
reg hire_sub effective_wage,  cluster(firm_id)

**Calculating wage subsidy that is equivalent to a 12 percentage point increase in demand for WCs
display 100 - (_b[effective_wage]*100 + 0.12) / _b[effective_wage]

**Calculating wage subsidy that is equivalent to a 22 percentage point increase in demand for WCs
display 100 - (_b[effective_wage]*100 + 0.22) / _b[effective_wage]

**Calculating wage subsidy that is equivalent to a 10.5 percentage point increase in demand for WCs
display 100 - (_b[effective_wage]*100 + 0.105) / _b[effective_wage]

**Calculating wage subsidy that is equivalent to a 13 percentage point increase in demand for WCs
display 100 - (_b[effective_wage]*100 + 0.13) / _b[effective_wage]

**Calculating wage subsidy that is equivalent to a 13.3 percentage point increase in demand for WCs
display 100 - (_b[effective_wage]*100 + 0.133) / _b[effective_wage]

**Calculating wage subsidy that is equivalent to a 6 percentage point increase in demand for WCs
display 100 - (_b[effective_wage]*100 + 0.06) / _b[effective_wage]

/*==================================================================
2019 tabulation of worker ratings (requires raw proprietary data)
==================================================================*/

import delimited "$Main/input_data/ratings_2019_1.csv", delimiter(comma) bindquote(strict) encoding(UTF-8) clear 
tempfile ratings1
sa `ratings1'
import delimited "$Main/input_data/ratings_2019_2.csv", delimiter(comma) bindquote(strict) encoding(UTF-8) clear
append using `ratings1'
gen flag_approved = 1 if job_state == "approved"
bysort user_id (flag_approved created_at): gen first_approved = _n == 1
replace first_approved = 0 if flag_approved != 1

bysort user_id (created_at): gen first = _n == 1
keep if created_at < "2020"

duplicates drop

*tabulation of ratings without withdrawn jobs in 2019
tab rating if rating > 0 & job_state != "withdrawn"

*tabulation of all ratings in 2019
tab rating if rating > 0


/*=========================================================================
Prior Beliefs about the Performance of WCs
===========================================================================*/
use "$Data/main_survey_wide.dta", clear

** Average Prior Belief of WC earning a low performance rating
sum performance_percent_pre if info_type == 2 & info_randomization == 1 & subsidy_rate == 0 
*15.629% vs actual value of 3% -- overestimate by 12.629 percentage points
local pre_low = r(mean)
local sd_low = r(sd)

sum performance_percent_post if info_type == 2 & info_randomization == 1 & subsidy_rate == 0 
*9.74%
local post_low = r(mean)

di `post_low' - `pre_low'
*-5.887 percentage point change in low-performance beliefs

di (`post_low' - `pre_low') / `pre_low'
*-37.668% change

di (`post_low' - `pre_low') / `sd_low'
*-0.243 standard deviations


** Average Prior Belief of WC earning a 5-star rating
sum performance_percent_pre if info_type == 1 & info_randomization == 1 & subsidy_rate == 0 
*72.53% vs actual value of 87% -- underestimate by 14.47 percentage points
local pre_high = r(mean)
local sd_high = r(sd)
sum performance_percent_post if info_type == 1 & info_randomization == 1 & subsidy_rate == 0 
*82.089
local post_high = r(mean)

di `post_high' - `pre_high'
*9.55 percentage point change in high-performance beliefs

di (`post_high' - `pre_high') / `pre_high'
*13.168% change

di (`post_high' - `pre_high') / `sd_high'
*0.436 standard deviations 

/*=========================================================================
Percent of Hiring Managers not Treated to Information who have same
Prior and Posterior Hiring Willingness
===========================================================================*/
use "$Data/main_survey_wide.dta", clear
gen same_hire_response = hire_sub == hire_sub_posterior
tab same_hire_response if info_randomization == 0 & !missing(hire_sub_posterior) & !missing(hire_sub)


/*=========================================================================
Number of Firms and Hiring Managers in Full Sample and No Wage Subsidy Sample
===========================================================================*/
use "$Data/main_survey_wide.dta", clear
distinct firm_id
distinct firm_id if subsidy_rate == 0 

/*=========================================================================
Number of Firms and Hiring Managers in Shown Objective Information in 
Full Sample and No Wage Subsidy Sample
===========================================================================*/
use "$Data/main_survey_wide.dta", clear
distinct firm_id if info_randomization == 1 
distinct firm_id if info_randomization == 1 & subsidy_rate == 0


/*=========================================================================
Industry Shares Correlation with Current/January Employment Ratio
===========================================================================*/
import excel "$Main/input_data/us_naicssector_large_emplsize_2019.xlsx", sheet("US large employment sizes") firstrow clear
drop if NAICSDescription == "Total"

collapse (sum) Firms Establishments Employment, by(NAICSDescription)

egen total_census_firms = total(Firms)
gen pct_industry_census_all = Firms / total_census_firms
gen industry = ""
replace industry = "Construction" if inlist(NAICSDescription, "Construction")
replace industry = "Finance, Insurance, \& Real Estate" if inlist(NAICSDescription, "Finance and Insurance", "Real Estate and Rental and Leasing")
replace industry = "Manufacturing" if inlist(NAICSDescription, "Manufacturing")
replace industry = "Retail" if inlist(NAICSDescription, "Retail Trade", "Wholesale Trade")
replace industry = "Service" if inlist(NAICSDescription, "Accommodation and Food Services", "Administrative and Support and Waste Management and Remediation Services", "Arts, Entertainment, and Recreation", "Educational Services", "Health Care and Social Assistance", "Information", "Management of Companies and Enterprises",  "Other Services (except Public Administration)", "Professional, Scientific, and Technical Services")
replace industry = "Transportation \& Public Utilities" if inlist(NAICSDescription, "Transportation and Warehousing", "Utilities")
replace industry = "Nonclassifiable" if inlist(NAICSDescription, "Industries not classified")

collapse (sum) pct_industry_census_all, by(industry)

tempfile census_industry
save `census_industry'

* First, we want to get the data that we have in the panel itself 
use "$Data/main_survey_wide.dta", clear 

* Collapse by firm_id
collapse (firstnm) industry sic1 (mean) platform_tenure estab_age empsize wcc_policy shrm_best_candidate shrm_second_chance shrm_incentivized shrm_customers shrm_regulations shrm_performance months_on_platform, by(firm_id)

* Generate industry indicators
tab industry
replace industry = "Retail" if industry == "Wholesale Trade"
replace industry = "Service" if industry == "Public Administration"
gen counter = 1 if industry != ""

collapse (sum) counter, by(industry)
egen total_industry = total(counter)
gen pct_industry = counter / total_industry

merge 1:1 industry using `census_industry'

* Census industry shares for firms with fewer than 500 employees from Appendix Table of "The impact of COVID-19 on small business outcomes and expectations" (2020) by Bartik, Bertrand, and Cullen
gen pct_industry_census = .
replace pct_industry_census = 0.117  if industry == "Construction"
replace pct_industry_census = 0.04 + 0.052 if industry == "Finance, Insurance, \& Real Estate"
replace pct_industry_census = 0.041 if industry == "Manufacturing"
replace pct_industry_census = 0.157 if industry == "Retail"
replace pct_industry_census = 0.013 + 0.135 + 0.003 + 0.058 + 0.015 + 0.109 + 0.022 + 0.09 + 0.116  if industry == "Service"
replace pct_industry_census = 0.031 + 0.001  if industry == "Transportation \& Public Utilities"

gen current_jan_emp_ratio = .
replace current_jan_emp_ratio = 0.66 if industry == "Construction"
replace current_jan_emp_ratio = 0.81*(4/9.2) + 0.70*(5.2/9.2) if industry == "Finance, Insurance, \& Real Estate" // Current/January Employment Ratio for Banking/finance is 0.81 and Real estate is 0.70, 0.755 is the mean
replace current_jan_emp_ratio = . if industry == "Manufacturing"
replace current_jan_emp_ratio = . if industry == "Nonclassifiable"
replace current_jan_emp_ratio = . if industry == "Public Administration"
replace current_jan_emp_ratio = 0.49 if industry == "Retail" // Current/January Employment Ratio is for Retailers, except grocery (Wholesale trade is combined with Retailers in the calculation of the ratio as well)
replace current_jan_emp_ratio = 0.40*(2.2/(11.6+13.5+4.5+4.5+10.9+2.2)) + 0.35*(11.6/(11.6+13.5+4.5+4.5+10.9+2.2)) + 0.80*(13.5/(11.6+13.5+4.5+4.5+10.9+2.2)) + 0.24*(4.5/(11.6+13.5+4.5+4.5+10.9+2.2)) + 0.30*(4.5/(11.6+13.5+4.5+4.5+10.9+2.2)) + 0.69*(10.9/(11.6+13.5+4.5+4.5+10.9+2.2))  if industry == "Service" // Current/January Employment Ratio for Personal Services is 0.35, Professional services is 0.80, Restaurants/bar/catering is 0.24, Tourism/lodging is 0.30, and Health care is 0.69, Arts & Entertainment is 0.40

replace current_jan_emp_ratio = . if industry == "Transportation \& Public Utilities"
drop if industry == ""


replace industry = "Other" if inlist(industry, "Transportation \& Public Utilities", "Manufacturing", "Nonclassifiable")

collapse (sum) pct_industry pct_industry_census_all pct_industry_census current_jan_emp_ratio, by(industry)

replace current_jan_emp_ratio = 0.70 if industry == "Other"
gen ratio_to_census_all = pct_industry / pct_industry_census_all
gen ratio_to_census_small = pct_industry / pct_industry_census


corr(pct_industry current_jan_emp_ratio)
corr(ratio_to_census_small current_jan_emp_ratio)
corr(ratio_to_census_all current_jan_emp_ratio)


/*=========================================================================
Felony vs. Misdemeanor p-values
===========================================================================*/
use "$Data/main_survey_longcrime.dta", clear
replace hire = 100*hire
gen no_conviction_restriction = violent_fel != 1 & prop_fin_fel != 1 & substance_fel != 1 & violent_mis != 1 & prop_fin_mis != 1 & substance_mis != 1 
local row=1
mat R=J(12,5,.)

* No Subsidy Sample
* Estimates by conviction type relative to violent felony
reg hire prop_fin_fel substance_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction if subsidy_rate == 0, cluster(firm_id) 

* p-value for felony vs. misdemeanor violent crime conviction 
local t = _b[violent_mis]/_se[violent_mis]
local p =2*ttail(e(df_r),abs(`t'))
local p_value_violent = string(`p', "%8.2f")
di "p-value of felony vs. misdemeanor violent crime conviction restriction in the no subsidy sample is `p_value_violent'"

* Estimates by conviction type relative to property/financial felony
reg hire violent_fel substance_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction if subsidy_rate == 0, cluster(firm_id)

*p-value for felony vs. misdemeanor property/financial crime conviction 
local t = _b[prop_fin_mis]/_se[prop_fin_mis]
local p =2*ttail(e(df_r),abs(`t'))
local p_value_prop_fin = string(`p', "%8.2f")
di "p-value of felony vs. misdemeanor property/financial crime conviction restriction in the no subsidy sample is `p_value_prop_fin'"

* Estimates by conviction type relative to substance felony
reg hire violent_fel prop_fin_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction if subsidy_rate == 0, cluster(firm_id)

* p-value for felony vs. misdemeanor drug-related crime conviction 
local t = _b[substance_mis]/_se[substance_mis]
local p =2*ttail(e(df_r),abs(`t'))
local p_value_drug = string(`p', "%8.2f")
di "p-value of felony vs. misdemeanor drug-related crime conviction restriction in the no subsidy sample is `p_value_drug'"


* Full Sample
* Estimates by conviction type relative to violent felony
reg hire prop_fin_fel substance_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction  i.subsidy_rate, cluster(firm_id)

* p-value for felony vs. misdemeanor violent crime conviction 
local t = _b[violent_mis]/_se[violent_mis]
local p =2*ttail(e(df_r),abs(`t'))
local p_value_violent_full = string(`p', "%8.2f")
di "p-value of felony vs. misdemeanor violent crime conviction restriction in the full sample is `p_value_violent_full'"

* Estimates by conviction type relative to property/financial felony
reg hire violent_fel substance_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction  i.subsidy_rate, cluster(firm_id)

* p-value for felony vs. misdemeanor property/financial crime conviction 
local t = _b[prop_fin_mis]/_se[prop_fin_mis]
local p =2*ttail(e(df_r),abs(`t'))
local p_value_prop_fin_full = string(`p', "%8.2f")
di "p-value of felony vs. misdemeanor property/financial crime conviction restriction in the full sample is `p_value_prop_fin_full'"

* Estimates by conviction type relative to substance felony
reg hire violent_fel prop_fin_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction i.subsidy_rate, cluster(firm_id)

* p-value for felony vs. misdemeanor drug-related crime conviction 
local t = _b[substance_mis]/_se[substance_mis]
local p =2*ttail(e(df_r),abs(`t'))
local p_value_drug_full = string(`p', "%8.2f")
di "p-value of felony vs. misdemeanor drug-related crime conviction restriction in the full sample is `p_value_drug_full'"


/*=========================================================================
Insurance and Violent Felony 
===========================================================================*/
use "$Data/main_survey_wide.dta", clear

* $5 Million insurance cap
table hire_sub hire_ins hire_violent_fel if subsidy_rate == 0 & ins_cap == 5000000
tab hire_sub if subsidy_rate == 0 & ins_cap == 5000000

tab hire_ins if subsidy_rate == 0 & ins_cap == 5000000 & hire_sub == 1
tab hire_ins if subsidy_rate == 0 & ins_cap == 5000000 & hire_sub == 0

tab hire_violent_fel if subsidy_rate == 0 & ins_cap == 5000000 & hire_sub == 1 & hire_ins == 1
tab hire_violent_fel if subsidy_rate == 0 & ins_cap == 5000000 & hire_sub == 1 & hire_ins == 0
tab hire_violent_fel if subsidy_rate == 0 & ins_cap == 5000000 & hire_sub == 0 & hire_ins == 1
tab hire_violent_fel if subsidy_rate == 0 & ins_cap == 5000000 & hire_sub == 0 & hire_ins == 0

* all crime and safety insurance caps
tab hire_sub if subsidy_rate == 0

tab hire_ins if subsidy_rate == 0 & hire_sub == 1
tab hire_ins if subsidy_rate == 0 & hire_sub == 0

tab hire_violent_fel if subsidy_rate == 0 & hire_sub == 1 & hire_ins == 1
tab hire_violent_fel if subsidy_rate == 0 & hire_sub == 1 & hire_ins == 0
tab hire_violent_fel if subsidy_rate == 0 & hire_sub == 0 & hire_ins == 1
tab hire_violent_fel if subsidy_rate == 0 & hire_sub == 0 & hire_ins == 0

* different version of same thing
use "$Data/main_survey_wide.dta", clear
pwcorr hire_violent_fel hire_ins if ins_cap==5000000
pwcorr hire_violent_fel hire_ins if hire_sub == 0

use "$Data/main_survey_wide.dta", clear
keep if ins_cap==5000000
foreach yX in 1 0 {
	sum hire_violent_fel if hire_ins==`yX'
}

/*=========================================================================
Difference in hiring for 10% and 25% Wage Subsidy
===========================================================================*/
use "$Data/main_survey_wide.dta", clear

* Normalize outcome variable
replace hire_sub = 100 * hire_sub 

* Restrict sample to 10% and 25% wage subsidy groups for comparison
keep if subsidy_rate == 10 | subsidy_rate == 25

* Calculate p-value that the two groups differ in hire_sub
reg hire_sub i.subsidy_rate, cluster(firm_id)
local t = _b[25.subsidy_rate]/_se[25.subsidy_rate]
local p = string(2*ttail(e(df_r),abs(`t')), "%8.2f")

di "p-value of difference in hiring rate for 10% and 25% subsidies is `p'"

/*=========================================================================
Requiring one job vs. requiring 5 and 25 jobs
===========================================================================*/
use "$Data/main_survey_long.dta", clear

* Normalize outcome variable
replace hire_hist = 100 * hire_hist

* No subsidy group
reg hire_hist ib1.past_jobs if subsidy_rate == 0, cluster(firm_id)

local t1 = _b[5.past_jobs]/_se[5.past_jobs]
local p1 = string(2*ttail(e(df_r),abs(`t1')), "%8.2f")
di "p-value of difference in hiring rate for requiring 1 and 5 jobs is `p1'"

local t2 = _b[25.past_jobs]/_se[25.past_jobs]
local p2 = string(2*ttail(e(df_r),abs(`t2')), "%8.2f")
di "p-value of difference in hiring rate for requiring 1 and 25 jobs is `p2'"

* Full sample
reg hire_hist ib1.past_jobs i.subsidy_rate, cluster(firm_id)

local t1 = _b[5.past_jobs]/_se[5.past_jobs]
local p1 = string(2*ttail(e(df_r),abs(`t1')), "%8.2f")
di "p-value of difference in hiring rate for requiring 1 and 5 jobs is `p1'"

local t2 = _b[25.past_jobs]/_se[25.past_jobs]
local p2 = string(2*ttail(e(df_r),abs(`t2')), "%8.2f")
di "p-value of difference in hiring rate for requiring 1 and 25 jobs is `p2'"

/*=========================================================================
$5,000 insurance cap vs. $100,000 and $5m insurance caps
===========================================================================*/
use "$Data/main_survey_long.dta", clear

* Normalize outcome variable
replace hire_ins = 100 * hire_ins

* No subsidy group
reg hire_ins ib5000.ins_cap if subsidy_rate == 0, cluster(firm_id)

local t1 = _b[100000.ins_cap]/_se[100000.ins_cap]
local p1 = string(2*ttail(e(df_r),abs(`t1')), "%8.2f")
di "p-value of difference in hiring rate for $5k and $100k insurance caps is `p1'"

local t2 = _b[5000000.ins_cap]/_se[5000000.ins_cap]
local p2 = string(2*ttail(e(df_r),abs(`t2')), "%8.2f")
di "p-value of difference in hiring rate for $5k and $5m insurance caps is `p2'"

* Full sample
reg hire_ins ib5000.ins_cap i.subsidy_rate, cluster(firm_id)

local t1 = _b[100000.ins_cap]/_se[100000.ins_cap]
local p1 = string(2*ttail(e(df_r),abs(`t1')), "%8.2f")
di "p-value of difference in hiring rate for $5k and $100k insurance caps is `p1'"

local t2 = _b[5000000.ins_cap]/_se[5000000.ins_cap]
local p2 = string(2*ttail(e(df_r),abs(`t2')), "%8.2f")
di "p-value of difference in hiring rate for $5k and $5m insurance caps is `p2'"
